import os
import platform
import sys
from sklearn.cluster import KMeans
import numpy as np


# 15清洗降维
def drop_clean(input_file, line, compression_ratio):
    """

    :param input_file: 读取文件地址
    :param line: 取数据前line个
    :param compression_ratio: 压缩比例
    :return: 数组格式输出数据
    """

    with open(input_file) as file_obj:
        line = int(line)
        compression_ratio = int(compression_ratio)
        array_source = eval(file_obj.read())
        array = array_source[0:line]

        # 读取的个数
        length = len(array)
        n = int(length / compression_ratio)
        line = np.array(array).reshape(-1, 1)  # 变成列
        km = KMeans(n_clusters=n).fit(line)  # 聚类中心n
        list_all = km.cluster_centers_.reshape(1, -1)
        after = list(list_all[0])  # 取出list

        return after


if __name__ == '__main__':
    # 参数是字典
    print(drop_clean(sys.argv[1], sys.argv[2], sys.argv[3]))
